#### ======================================================================
#### Online appendix: POP quadrant plots (China and USA, pooled cutoffs)
#### ----------------------------------------------------------------------
#### Purpose:
####   Classify and visualize countries by "public opinion potential" (POP)
####   using two summary quantities computed from weighted survey estimates:
####     (1) Preference variance (PV): SD of desired relationship (SD_Desired)
####     (2) Preference gap (PG): absolute difference between desired and
####         current relationship (|Delta|)
####
#### Design choices:
####   - Two separate figures are produced (China and USA), but the quadrant
####     cutoffs are pooled across both outcomes to make the two panels
####     directly comparable.
####   - All quantities used for placement and cutoffs are based on the
####     weighted, full-sample estimates (Method == "weighted", Subgroup == "all").
####   - Country labels use ISO3 codes; ggrepel is used to reduce overlap.
####
#### Data:
####   data/complete_aggregated_results_with_subgroups.csv
####
#### Output:
####   output/pop_types_china-6x10.pdf
####   output/pop_types_us-6x10.pdf
#### ======================================================================

#### LOAD PACKAGES ####
library(ggplot2)
library(dplyr)
library(readr)
library(tidyr)
library(ggrepel)

#### OPTIONAL: ENSURE CALIBRI IS AVAILABLE ####
#### This check stops execution if Calibri is requested but not available,
#### which helps keep figure typography consistent across machines.
enforce_calibri <- TRUE
base_family     <- "Calibri"

if (enforce_calibri) {
  if (!requireNamespace("systemfonts", quietly = TRUE)) {
    stop("Package 'systemfonts' is required to verify Calibri. Run: install.packages('systemfonts')")
  }
  font_info <- systemfonts::match_font(base_family)
  if (is.null(font_info$path) || is.na(font_info$path) || font_info$path == "") {
    stop("Calibri font not found on this system.")
  }
}

#### LOAD DATA ####
#### Aggregated results include both weighted and unweighted summaries,
#### and multiple subgroups. This script uses the weighted full sample only.
complete_results <- read_csv(
  "data/complete_aggregated_results_with_subgroups.csv",
  show_col_types = FALSE
)

#### STANDARDIZE COUNTRY NAMES ####
#### Harmonize country labels to keep faceting/labeling consistent across figures.
complete_results <- complete_results %>%
  mutate(
    Country = recode(
      Country,
      "Democratic Republic of Congo" = "DRC",
      "Turkey" = "Türkiye",
      "the Philippines" = "Philippines"
    )
  )

#### FILTER COUNTRIES + KEEP ONLY WEIGHTED, FULL SAMPLE ####
#### Restrict to the country set used in the paper and the pooled (all) sample.
countries_to_include <- c(
  "Argentina", "Belarus", "Bolivia", "Brazil", "Burkina Faso",
  "Cameroon", "Colombia", "Croatia", "DRC", "Egypt", "Georgia",
  "Ghana", "Haiti", "Hungary", "Indonesia", "Kenya", "Lebanon",
  "Madagascar", "Mali", "Mexico", "Nigeria", "Peru", "Philippines",
  "Poland", "Romania", "Serbia", "Tunisia", "Türkiye", "Uruguay",
  "Venezuela"
)

complete_results_filtered <- complete_results %>%
  filter(
    Country %in% countries_to_include,
    Subgroup == "all",
    Method == "weighted"
  )

#### RESHAPE TO LONG (CHINA + USA) ####
#### Create a common structure with one row per country-outcome (China or USA),
#### containing:
####   - Delta: Desired minus current relationship (signed)
####   - SD_Desired: dispersion of desired relationship
#### These are later transformed into PG = |Delta| and PV = SD_Desired.
long_results <- complete_results_filtered %>%
  transmute(
    Country,
    ISO3_code,
    China_Delta,
    China_SD_Desired,
    USA_Delta,
    USA_SD_Desired
  ) %>%
  pivot_longer(
    cols = c(China_Delta, China_SD_Desired, USA_Delta, USA_SD_Desired),
    names_to = c("Country_Type", "Metric"),
    names_pattern = "^(China|USA)_(Delta|SD_Desired)$",
    values_to = "Value"
  ) %>%
  pivot_wider(names_from = Metric, values_from = Value) %>%
  mutate(
    SD = SD_Desired,
    ISO3 = ISO3_code
  )

#### POOLED CUTOFFS (SHARED ACROSS BOTH PLOTS) ####
#### Compute pooled reference lines across China and USA observations.
#### These pooled cutoffs are used in both the China and USA panels so that
#### the quadrant definitions are comparable across outcomes.
overall_mean_pg <- mean(abs(long_results$Delta), na.rm = TRUE)
overall_mean_pv <- mean(long_results$SD, na.rm = TRUE)

#### The script also defines an SD-based band around the pooled mean PG.
#### This is used to position quadrant labels (not to subset observations).
overall_sd_pg <- 0.5 * sd(abs(long_results$Delta), na.rm = TRUE)

pg_low_cutoff  <- overall_mean_pg - overall_sd_pg
pg_high_cutoff <- overall_mean_pg + overall_sd_pg

#### PLOT SETTINGS ####
#### Sizes are tuned for a 950x650px-equivalent PDF export.
base_size      <- 10
axis_text_size <- 8

.pt           <- 2.845276
label_size_pt <- 9.0
quad_size_pt  <- 10.0

label_size <- label_size_pt / .pt
quad_size  <- quad_size_pt  / .pt

#### SPLIT DATA ####
#### Create outcome-specific datasets for separate plots.
long_results_china <- long_results %>% filter(Country_Type == "China")
long_results_usa   <- long_results %>% filter(Country_Type == "USA")

#### SHARED SCALES ####
#### Use pooled x/y limits across both outcomes to ensure consistent axes.
x_limits <- range(long_results$SD, na.rm = TRUE)
y_limits <- range(abs(long_results$Delta), na.rm = TRUE)

#### FIXED QUADRANT LABEL POSITIONS ####
#### Quadrant titles are placed at fixed locations derived from pooled cutoffs.
quad_x_left   <- overall_mean_pv / 1.2
quad_x_right  <- overall_mean_pv * 1.2
quad_y_top    <- pg_high_cutoff + overall_sd_pg + 0.3
quad_y_bottom <- pg_low_cutoff  - overall_sd_pg - 0.2

#### PLOT FUNCTION ####
#### Produces a POP quadrant plot for a single outcome (China or USA) using
#### pooled cutoffs and shared axis limits.
make_pop_plot_country <- function(data) {
  ggplot(
    data,
    aes(
      x     = SD,
      y     = abs(Delta),
      label = ISO3
    )
  ) +
    geom_point(size = 1.5, colour = "black") +
    geom_text_repel(
      size          = label_size,
      family        = base_family,
      colour        = "black",
      max.overlaps  = Inf,
      box.padding   = 0.03,
      point.padding = 0.03,
      segment.size  = 0.15
    ) +
    #### Reference lines define the pooled quadrant boundaries.
    geom_hline(yintercept = overall_mean_pg, linetype = "dashed", colour = "black") +
    geom_vline(xintercept = overall_mean_pv, linetype = "dashed", colour = "black") +
    labs(
      x = "Preference Variance\n(Standard Deviation of Desired Relationship)",
      y = "Preference Gap\nABS(Desired Relationship - Current Relationship)"
    ) +
    coord_cartesian(xlim = x_limits, ylim = y_limits) +
    theme_minimal(base_family = base_family, base_size = base_size) +
    theme(
      axis.text        = element_text(size = axis_text_size, colour = "black"),
      panel.grid.major = element_line(colour = "grey85", linewidth = 0.3),
      panel.grid.minor = element_blank(),
      legend.position  = "none"
    ) +
    #### Quadrant labels correspond to the POP typology used in the paper.
    annotate(
      "text",
      x        = quad_x_left,
      y        = quad_y_top,
      label    = "Type 2\nHigh Harmonious POP",
      size     = quad_size,
      fontface = "bold",
      family   = base_family
    ) +
    annotate(
      "text",
      x        = quad_x_right,
      y        = quad_y_top,
      label    = "Type 3\nHigh Contentious POP",
      size     = quad_size,
      fontface = "bold",
      family   = base_family
    ) +
    annotate(
      "text",
      x        = quad_x_left,
      y        = quad_y_bottom,
      label    = "Type 1\nLow POP",
      size     = quad_size,
      fontface = "bold",
      family   = base_family
    ) +
    annotate(
      "text",
      x        = quad_x_right,
      y        = quad_y_bottom,
      label    = "Type 4\nMedium Contentious POP",
      size     = quad_size,
      fontface = "bold",
      family   = base_family
    )
}

#### CHINA PLOT ####
#### POP quadrant plot for China outcome, using pooled cutoffs and shared axes.
china_pop_plot <- make_pop_plot_country(long_results_china)
print(china_pop_plot)

ggsave(
  "output/pop_types_china-6x10.pdf",
  plot   = china_pop_plot,
  width  = 950/96,
  height = 650/96,
  units  = "in",
  dpi    = 300,
  device = cairo_pdf
)

#### USA PLOT ####
#### POP quadrant plot for USA outcome, using pooled cutoffs and shared axes.
us_pop_plot <- make_pop_plot_country(long_results_usa)
print(us_pop_plot)

ggsave(
  "output/pop_types_us-6x10.pdf",
  plot   = us_pop_plot,
  width  = 950/96,
  height = 650/96,
  units  = "in",
  dpi    = 300,
  device = cairo_pdf
)
